链接地址:http://www.slideshare.net/mongodb/mongodb-indexing-the-details
db.c.find({x: 6}).limit(1);
db.c.ensureIndex({x: 1});
{
'indexBounds': {
'x': [
[6, 6]
]
},
'nscanned': 1, // 扫描的行数
'nscannedObjects': 1, // 扫描的对象
'n': 1 // 返回的行数
}
db.c.find({x: 6});
db.c.ensureIndex({x: 1});
{
'indexBounds': {
'x': [
[6, 6]
]
},
'nscanned': 3,
'nscannedObjects': 3,
'n': 3
}
db.c.find({x: 6, y: 1});
db.c.ensureIndex({x: 1});
{
'indexBounds': {
'x': [
[6, 6]
]
},
'nscanned': 3,
'nscannedObjects': 3,
'n': 1
}
Documents for all matching keys scanned, but only one document matched on non index keys.
db.c.find({x: {$gte: 4, $lte: 7}});
db.c.ensureIndex({x: 1});
{
'indexBounds': {
'x': [
[4, 7]
]
},
'nscanned': 4,
'nscannedObjects': 4,
'n': 4
}
db.c.find(x: {$gt: 4, $lt: 7});
db.c.ensureIndex({x: 1});
Explain doesn’t indicate that the range is exclusive
But index keys matching the range bounds are not scanned because the bounds are exclusive.
db.c.find({x: {$gt: 7}});
db.c.ensureIndex({x: 1});
{
'indexBounds': {
'x': [
[7, 1.7976931348623157e+308]
]
},
'nscanned': 2,
'nscannedObjects': 2,
'n': 1
}
All keys in valid range are scanned, but the matcher rejects duplicate documents making n == 1.
db.c.find({x: {$gt: 4, $lt: 7}});
db.c.find({x: {$gt: 4}});
db.c.find({x: {$ne: 4}});
db.c.find({x: /^a/});
db.c.find({x: /a/})
{
'indexBounds': {
'x': [
[4, 7]
]
}
}
{
'indexBounds': {
'x': [
[4, 1.7976931348623157e+308]
]
}
}
{
'indexBounds': {
'x': [
[{
'$minElement': 1
}, 4],
[4, {
'$maxElement': 1
}]
]
}
}
{
'indexBounds': {
'x': [
['a', 'b'],
[/^a/, /^a/]
]
}
}
{
'indexBounds': {
'x': [
['', {}],
[/a/, /a/]
]
}
}
db.c.find({x: {$in: [3, 6]}});
db.c.ensureIndex({x: 1});
{
'indexBounds': {
'x': [
[3, 3],
[6, 6]
]
},
'nscanned': 3,
'nscannedObjects': 2,
'n': 2
}
Why is nscanned 3?
This is an algorithmic detail we’ll discuss more later, but when there are disjoint ranges for a key nscanned may be higher than the number of matching keys.
db.c.find({x: {$all: [3, 6]}});
db.c.ensureIndex({x: 1});
The first entry in the $all match array is always used for index bounds. Note this may not be the least numerous indexed value in the $all array.
{
'indexBounds': {
'x': [
[3, 3]
]
},
'nscanned': 1,
'nscannedObjects': 1,
'n': 1
}
db.c.find({x: {$lt: 6}, y: 3}).limit(3);
db.c.ensureIndex({x: 1});
{
'indexBounds': {
'x': [
[-1.7976931348623157e+308, 6]
]
}
}
Scan until three matches are found, then stop;
db.c.find({x: {$lt: 6}, y: 3}).skip(3);
db.c.ensureIndex({x: 1});
{
'indexBounds': {
'x': [
[-1.7976931348623157e+308, 6]
]
}
}
All skipped documents are scanned.
db.c.find({x: {$lt: 6}).sort({x: 1});
db.c.ensureIndex({x: 1});
'cursor': 'BtreeCursor x_1'
db.c.find({x: {$lt: 6}).sort({y: 1});
db.c.ensureIndex({x: 1});
Results are sorted on the fly to match requested order.
The scanAndOrder field is only printed when its value is true.
db.c.count({x: {$gte: 4, $lte: 7}});
db.c.ensureIndex({x: 1});
We’re just counting keys here, not loading the full documents.
Id would be returned by default, but isn’t in the index so we need to exclude to return only indexed fields.
{
'isMultiKey': false,
'indexOnly': true
}
> db.c.find({x: 6}, {x: 1, _id: 0}).explain();
{
'cursor': 'BtreeCursor x_1',
'nscanned': 1,
'nscannedObjects': 1,
'n': 1,
'millis': 0,
'nYields': 0,
'nChunkSkips': 0,
'isMultiKey': true,
'indexOnly': false,
'indexBounds': {
'x': [
[6, 6]
]
}
}
Currently we set isMultiKey to true the first time we save a doc where the field is a multikey array.
But when all multikey docs are removed we don’t reset isMultiKey.
This can be improved.
{
'isMultiKey': true,
'indexOnly': false
}
db.c.find({x: {$gte: 4, $lte: 7}}, {$set: {x: 2}});
db.c.ensureIndex({x: 1});
db.c.find({x: 5, y: 'c'});
db.c.ensureIndex({x: 1, y: 1});
{
'indexBounds': {
'x': [
[5, 5]
],
'y': [
['c', 'c']
]
},
'nscanned': 1,
'nscannedObjects': 1,
'n': 1
}
db.c.find({x: 5, y: {$in: ['c', 'f']}});
db.c.ensureIndex({x: 1, y: 1});
{
'indexBounds': {
'x': [
[5, 5]
],
'y': [
['c', 'c'],
['f', 'f']
]
},
'nscanned': 3,
'nscannedObjects': 2,
'n': 2
}
db.c.find({x: 5, y: {$gte: 'd'}});
db.c.ensureIndex({x: 1, y: 1});
{
'indexBounds': {
'x': [
[5, 5]
],
'y': [
'd',
{}
]
},
'nscanned': 2,
'nscannedObjects': 2,
'n': 2
}
db.c.find({x: {$in: [5, 9]}, y: {$in: ['c', 'f']}});
db.c.ensureIndex({x: 1, y: 1});
{
'indexBounds': {
'x': [
[5, 5],
[9, 9]
],
'y': [
['c', 'c'],
['f', 'f']
]
},
'nscanned': 5,
'nscannedObjects': 3,
'n': 3
}
db.c.find({x: {$in: [5, 9]}, y: {$lte: 'd'}});
db.c.ensureIndex({x: 1, y: 1});
{
'indexBounds': {
'x': [
[5, 5],
[9, 9]
],
'y': [
['', 'd']
]
},
'nscanned': 5,
'nscannedObjects': 3,
'n': 3
}
db.c.find({x: {$gte: 4}, y: 'c'});
db.c.ensureIndex({x: 1, y: 1});
{
'indexBounds': {
'x': [
[4, 1.7976931348623157e+30]
],
'y': [
['c', 'c']
]
},
'nscanned': 7,
'nscannedObjects': 2,
'n': 2
}
High nscanned because every distinct value of x must be checked.
Every distinct value of x must be checked.
db.c.find({x: {$gte: 4}, y: {$in: ['c', 'a']}});
db.c.ensureIndex({x: 1, y: 1});
{
'indexBounds': {
'x': [
[4, 1.7976931348623157e+30]
],
'y': [
['a', 'a'],
['c', 'c']
]
},
'nscanned': 7,
'nscannedObjects': 3,
'n': 3
}
Every distinct value of x must be checked for y values ‘a’ and ‘c’
db.c.find({x: {$gte: 3, $lte: 7}, y: {$gte: 'c', $lte: 'f'}});
db.c.ensureIndex({x: 1, y: 1});
{
'indexBounds': {
'x': [
[3, 7]
],
'y': [
['c', 'f']
]
},
'nscanned': 6,
'nscannedObjects': 4,
'n': 4
}
db.c.find({$or: [{x: 5}, {y: 'd'}]});
db.c.ensureIndex({x: 1, y: 1});
> db.c.find({$or: [{x: 5}, {y: 'd'}]})
{
'clauses': [
{
'cursor': 'BtreeCursor x_1',
'nscanned': 2,
'nscannedObjects': 2,
'n': 2,
'millis': 0,
'nYield': 0,
'nChunkSkips': 0,
'isMultiKey': false,
'indexOnly': false,
'indexBounds': {
'x': [
[5, 5]
]
}
},
{
'cursor': 'BtreeCursor y_1',
'nscanned': 2,
'nscannedObjects': 2,
'n': 1,
'millis': 1,
'nYield': 0,
'nChunkSkips': 0,
'isMultiKey': false,
'indexOnly': false,
'indexBounds': {
'y': [
['d', 'd']
]
}
}
],
'nscanned': 4,
'nscannedObjects': 4,
'n': 3,
'millis': 1
}
{
'nscanned': 4,
'nscannedObjects': 4,
'n': 3,
'millis': 1
}
We have already scanned the x index for x:5.
So this document was returned already.
We don’t return it again.
db.c.find({$or: [{x: 5}, {y: 'd'}]});
db.c.ensureIndex({x: 1}); // no index on y
Since y is not indexed, we must do a full collection scan to match y:’d’.
Since a full scan is required, we don’t use the index on x to match x:5.
db.c.find({$or: [{x: {$gt: 2, $lt: 6}}, {x: 5}]});
db.c.ensureIndex({x: 1});
The index range of the second clause is included in the index range of the first clause, so we use the first index range only.
db.c.find({$or: [{x: {$gt: 2, $lt: 6}, y: 'c'}, {x: 5, y: 'd'}]});
db.c.ensureIndex({x: 1});
The index range for the first clause contains the index range for the second clause, so all matching is done using the index range for the first clause.
db.c.find({$or: [{x: {$gt: 2, $lt: 6}}, {x: {$gt: 4, $lt: 7}}]});
db.c.ensureIndex({x: 1, y: 1});
> db.c.find({$or: [{x: {$gt: 2, $lt: 6}}, {x: {$gt: 4, $lt: 7}}]});
{
'clauses': [
{
'cursor': 'BtreeCursor x_1',
'nscanned': 3,
'nscannedObjects': 3,
'n': 3,
'millis': 0,
'nYield': 0,
'nChunkSkips': 0,
'isMultiKey': false,
'indexOnly': false,
'indexBounds': {
'x': [
[2, 6]
]
}
},
{
'cursor': 'BtreeCursor x_1',
'nscanned': 1,
'nscannedObjects': 1,
'n': 1,
'millis': 1,
'nYield': 0,
'nChunkSkips': 0,
'isMultiKey': false,
'indexOnly': false,
'indexBounds': {
'x': [
[6, 7]
]
}
}
],
'nscanned': 4,
'nscannedObjects': 4,
'n': 4,
'millis': 1
}
The index range scanned for the previous clause is removed.
db.c.find({
$or: [
{x: {$gt: 2, $lt: 6}, y: {$gt: 'b', $lt: 'f'}},
{x: {$gt: 4, $lt: 7}, y: {$gt: 'b', $lt: 'e'}}
]
});
db.c.ensureIndex({x: 1, y: 1});
> db.c.find({
$or: [
{x: {$gt: 2, $lt: 6}, y: {$gt: 'b', $lt: 'f'}},
{x: {$gt: 4, $lt: 7}, y: {$gt: 'b', $lt: 'e'}}
]
});
{
'clauses': [
{
'cursor': 'BtreeCursor x_1_y_1',
'nscanned': 4,
'nscannedObjects': 3,
'n': 3,
'millis': 1,
'nYield': 0,
'nChunkSkips': 0,
'isMultiKey': false,
'indexOnly': false,
'indexBounds': {
'x': [
[2, 6]
],
'y': [
['b', 'f']
]
}
},
{
'cursor': 'BtreeCursor x_1_y_1',
'nscanned': 0,
'nscannedObjects': 0,
'n': 0,
'millis': 1,
'nYield': 0,
'nChunkSkips': 0,
'isMultiKey': false,
'indexOnly': false,
'indexBounds': {
'x': [
[6, 7]
],
'y': [
['b', 'e']
]
}
}
],
'nscanned': 4,
'nscannedObjects': 3,
'n': 3
}
The index range scanned for the previous clause is removed.
We only have to scan the remainder here.
find({x: {lt: 7}}).sort({x: 1})
Rule of Thumb
Feature Requests
jira.mongodb.org
Support
groups.google.com/group/mongodb-user
Next up:
Sharding Details with Eliot